The MNet R package requires R version 4.0.0 or higher.
ggcor is from GitHub. Hence, it is recommended to install the package before installing MetaboDiff.
devtools::install_github("Github-Yilei/ggcor")
devtools::install_github("hfang-bristol/XGR")
MNet is available for all operating systems and can be installed via the compressed files.
install.packages("MNet_0.1.0.tar.gz")
The metabolism and transcriptome network analysis mainly includes several parts, some functions are from our package “MNet”
In this tutorial we are going to analyze the dataset of metabolic activities published in Terunuma et al (2014). In this paper the Authors characterized the transcriptomic and metabolomic profile of several types of human breast tumors to uncovered metabolite signatures using an untargeted discovery approach. They found that the oncometabolite 2-hydroxyglutarate (2HG) accumulates at high levels in a subset of tumors and discovered an association between increased 2HG levels and MYC pathway activation in breast cancer. As an example of mixed metabolites and genes analyses here we are going to use both the dataset of metabolite abundances and gene expression as reported in the supplementary files.
gene_url <-
url("https://romualdi.bio.unipd.it/wp-uploads/2018/04/Terunuma_gene_expr.txt")
gexpr <- read.table(gene_url, header = TRUE, sep = "\t", row.names = 1,
stringsAsFactors = FALSE)
metab_url <-
url("https://romualdi.bio.unipd.it/wp-uploads/2018/04/Terunuma_metabolite_expr.txt")
mexpr <- read.table(metab_url, header = TRUE, sep = "\t", row.names = NULL,
stringsAsFactors = FALSE)
idcols <- 1:4
nas_by_row <-
rowSums(is.na(data.matrix(mexpr[,-idcols]))) /
(ncol(mexpr) - length(idcols))
dexpr <- mexpr[nas_by_row < 0.5,]
sum(rowSums(is.na(dexpr[,-idcols])) == 0)
iexpr <- cbind(dexpr[,idcols],
impute::impute.knn(data.matrix(dexpr[,-idcols]))$data,
stringsAsFactors = FALSE)
sum(is.na(iexpr[,-idcols]))
iexpr[,idcols][iexpr[,idcols] == ""] <- NA
head(iexpr[,idcols])
valid_cas <- !is.na(iexpr$KEGG_ID)
KEGG_col <- which(names(iexpr) == "KEGG_ID")
cexpr <- iexpr[valid_cas, c(KEGG_col, seq.int(5, ncol(iexpr)))]
mexpr <- cexpr %>%
tibble::remove_rownames() %>%
tidyr::separate(KEGG_ID,sep=",","KEGG_ID") %>%
dplyr::distinct(KEGG_ID,.keep_all = T) %>%
dplyr::left_join(all_kegg_id,by=c("KEGG_ID"="ENTRY")) %>%
dplyr::distinct(KEGG_ID,.keep_all = TRUE) %>%
dplyr::filter(!is.na(NAME)) %>%
tibble::column_to_rownames("NAME") %>%
dplyr::select(-c("KEGG_ID","source"))
sample_rna <- data.table::fread("TNBC/sample_rna.txt") %>%
as.data.frame() %>%
tidyr::separate("Sample Name",sep="[.]",c("Sample_name1","Tissue")) %>%
dplyr::mutate(name=ifelse(Tissue=="TumorTissue",paste0(Isolate,"_T"),
paste0(Isolate,"_N"))) %>%
dplyr::mutate(name1=name) %>%
tidyr::separate("name1",sep="_",c("name1","Type")) %>%
dplyr::select(c("Run","name","Type"))
rna_data <- readRDS("TNBC/TNBC_VST.rds") %>%
as.data.frame() %>%
dplyr::select(sample_rna$Run)
names(rna_data) <- sample_rna$name
meta_data <- data.table::fread("TNBC/TNBC-meta.txt") %>%
as.data.frame()
sample_intersect <- intersect(names(rna_data),names(meta_data))
a <- name2keggid(meta_data$Name)
a_temp <- a %>%
tidyr::separate_rows(kegg_id) %>%
dplyr::mutate(kegg_id=ifelse(is.na(kegg_id),name,kegg_id))
meta_data <- meta_data %>%
dplyr::left_join(a_temp,by=c("Name"="name")) %>%
dplyr::select(-"Name") %>%
dplyr::distinct(kegg_id,.keep_all = TRUE) %>%
tibble::column_to_rownames("kegg_id") %>%
dplyr::select(sample_intersect)
rna_data <- rna_data %>%
dplyr::select(sample_intersect)
save(meta_data,file="TNBC/meta_data.rda")
save(rna_data,file="TNBC/rna_data.rda")
#load("result/metabo_dat.rda")
load("result/mexpr.rda")
#mexpr <- data.matrix(cexpr[,-1])
tumor_indices <- grep("TUMOR", colnames(mexpr))
normal_indices <- grep("NORMAL",colnames(mexpr))
group <- colnames(mexpr)
group[tumor_indices] <- "tumor"
group[normal_indices] <- "normal"
Change the metabolite name to refmet name RefMet: A Reference list of Metabolite names The main objective of RefMet is to provide a standardized reference nomenclature for both discrete metabolite structures and metabolite species identified by spectroscopic techniques in metabolomics experiments.
refmetid_result <- name2refmet(rownames(mexpr))
write.table(refmetid_result,"result/refmetid_result.txt",quote=F,sep="\t",row.names=F)
search the kegg id corresponding to the metabolites name
keggid_result <- name2keggid(rownames(mexpr)) %>%
tidyr::separate_rows(kegg_id,sep=";")
write.table(keggid_result,"result/keggid_result.txt",quote=F,sep="\t",row.names=F)
search the kegg pathway corresponding to the metabolite name
result_all <- name2pathway(rownames(mexpr))
##### the output is the each metabolite related pathway
result_name2pathway <- result_all$name2pathway
write.table(result_name2pathway,"result/keggpathway_result.txt",quote=F,sep="\t",row.names=F)
keggpathway_result <- keggid2pathway(keggid_result$kegg_id)
head(keggpathway_result)
the PCA of the data
### the pca plot
out_dir <- "result"
p_PCA <- pPCA(mexpr,group)
ggplot2::ggsave(paste0(out_dir,"/1.PCA_1.pdf"),p_PCA$p1)
ggplot2::ggsave(paste0(out_dir,"/1.PCA_2.pdf"),p_PCA$p2)
ggplot2::ggsave(paste0(out_dir,"/1.PCA_3.pdf"),p_PCA$p3)
ggplot2::ggsave(paste0(out_dir,"/1.PCA_1.png"),p_PCA$p1)
ggplot2::ggsave(paste0(out_dir,"/1.PCA_2.png"),p_PCA$p2)
ggplot2::ggsave(paste0(out_dir,"/1.PCA_3.png"),p_PCA$p3)
using the function differential_metabolites in my R packages “MNet”
diff_result <- DM(mexpr,group)
dev.off()
write.table(diff_result,"result/DM_result.txt",quote=F,row.names=F,sep="\t")
## filter the differential metabolites by default fold change >1.5 or < 1/1.5 ,fdr < 0.05 and VIP>1
diff_result_filter <- diff_result %>%
dplyr::filter(fold_change >1.3 | fold_change < 1/1.3) %>%
dplyr::filter(fdr_wilcox<0.1) %>%
dplyr::filter(vip>0.8)
utils::write.table(diff_result,paste0(out_dir,"/2.all_TumorvsNormal.txt"),quote=F,row.names=F,sep="\t")
utils::write.table(diff_result_filter,paste0(out_dir,"/2.diff_TumorvsNormal.txt"),quote=F,row.names=F,sep="\t")
library(dplyr)
out_dir="result"
df <- data.table::fread(paste0("result","/2.diff_TumorvsNormal.txt")) %>%
as.data.frame()
df %>% DT::datatable(options=list(pageLength=5,searchHighlight=T,buttons=c('csv','copy'), dom='Bt',scrollX=T,fixedColumns=list(leftColumns=1)), style='default', caption="", rownames=FALSE, escape=F, extensions=c('Buttons','FixedColumns'))
the volcano plot of metabolites using the function “pVolcano” in the package “MNet”
p_volcano <- pVolcano(diff_result,foldchange=1.5)
#p_volcano
ggplot2::ggsave(paste0(out_dir,"/3.volcano.pdf"),p_volcano)
ggplot2::ggsave(paste0(out_dir,"/3.volcano.png"),p_volcano)
the heatmap plot of differentital metabolites using the function “plot_heatmap” in our package “MNet”
mexpr_diff <- mexpr[rownames(mexpr) %in% diff_result_filter$name,]
p_heatmap <- pHeatmap(mexpr_diff,group,fontsize_row=5,fontsize_col=4,clustering_method="complete",clustering_distance_cols="euclidean")
ggplot2::ggsave(paste0(out_dir,"/3.heatmap.pdf"),p_heatmap,width=10,height=8)
ggplot2::ggsave(paste0(out_dir,"/3.heatmap.png"),p_heatmap,width=10,height=8)
the zscore plot of differentital metabolites using the function “plot_zscore” in our package “MNet”
p_zscore <- pZscore(mexpr_diff,group)
#p_zscore
ggplot2::ggsave(paste0(out_dir,"/3.z_score.pdf"),p_zscore,width=5,height=5)
ggplot2::ggsave(paste0(out_dir,"/3.z_score.png"),p_zscore,width=5,height=5)
using logistic model to select the feature
mexpr_t <- mexpr_diff %>%
t() %>%
data.frame() %>%
dplyr::mutate(group=group)
mexpr1 <- mexpr_t[which(mexpr_t$group==unique(group)[1]),]
mexpr2 <- mexpr_t[which(mexpr_t$group==unique(group)[2]),]
train_sub1 = sample(nrow(mexpr1),8/10*nrow(mexpr1))
train_sub2 = sample(nrow(mexpr2),8/10*nrow(mexpr2))
train_data = rbind(mexpr1[train_sub1,],mexpr2[train_sub2,])
test_data <- rbind(mexpr1[-train_sub1,],mexpr2[-train_sub2,])
#train_sub = sample(nrow(mexpr_t),8/10*nrow(mexpr_t))
#train_data <- mexpr_t[train_sub,]
#test_data <- mexpr_t[-train_sub,]
ML_logic(train_data,test_data,out_dir=paste0(out_dir,"/4.machine_learning_logic/"))
using random forest to do the features selection
mexpr_t <- as.data.frame(t(mexpr))
mexpr_t$group <- group
result <- ML_RF(mexpr_t)
ggsave("result/machine_learning_RF_AUC.pdf",result$p)
ggsave("result/machine_learning_RF_AUC.png",result$p)
the differential abundance (DA) score analysis
## 4.1 the differential metabolites' DA score
metabolites_kegg_id_temp <- name2keggid(diff_result$name)
metabolites_kegg_id <- metabolites_kegg_id_temp %>%
dplyr::distinct(name,.keep_all=TRUE) %>%
dplyr::left_join(diff_result,by="name") %>%
tidyr::separate_rows(kegg_id,sep=";")
utils::write.table(metabolites_kegg_id,paste0(out_dir,"/2.all_TumorvsNormal_kegg_id.txt"),quote=F,row.names=F,sep="\t")
increase_metabolites <- metabolites_kegg_id %>%
dplyr::filter(fold_change>3/2 ) %>%
dplyr::filter(fdr_wilcox<0.05) %>%
dplyr::filter(vip>1) %>%
dplyr::filter(!is.na(kegg_id)) %>%
dplyr::pull(kegg_id)
decrease_metabolites <- metabolites_kegg_id %>%
dplyr::filter(fold_change<2/3) %>%
dplyr::filter(fdr_wilcox<0.05) %>%
dplyr::filter(vip>1) %>%
dplyr::filter(!is.na(kegg_id)) %>%
dplyr::pull(kegg_id)
all_metabolites <- metabolites_kegg_id %>%
dplyr::filter(!is.na(kegg_id)) %>%
dplyr::pull(kegg_id)
DA_result <- DAscore(increase_metabolites,decrease_metabolites,all_metabolites,min_measured_num = 0,sort_plot = "classification")
#DA_result$result
#DA_result$p
ggplot2::ggsave(paste0(out_dir,"/5.DA_score.pdf"),DA_result$p,width=10,height=8)
ggplot2::ggsave(paste0(out_dir,"/5.DA_score.png"),DA_result$p,width=10,height=8)
utils::write.table(DA_result$result,paste0(out_dir,"/5.DA_score.txt"),quote=F,row.names=F,sep="\t")
## 4.2 pathway
result_all <- name2pathway(diff_result_filter$name)
##### the output is the each metabolite related pathway
result_name2pathway <- result_all$name2pathway
##### the related pathway and the metabolites in the pathway
result_pathway <- result_all$pathway
##### the metabolites and its kegg id
kegg_name <- result_all$kegg_name
utils::write.table(result_name2pathway,paste0(out_dir,"/6.diff_name2pathway.txt"),quote=F,row.names=F,sep="\t")
utils::write.table(result_pathway,paste0(out_dir,"/6.diff_pathway_info.txt"),quote=F,row.names=F,sep="\t")
kegg_pathway_filter <- kegg_pathway %>%
dplyr::filter(!is.na(pathway_type)) %>%
dplyr::select(c("ENTRY","PATHWAY"))
kegg_id_need <- c(increase_metabolites,decrease_metabolites)
xgr_output <- xgr(kegg_id_need,kegg_pathway_filter)
#xgr_result <- xgr_output$output
#xgr_output$gp
ggplot2::ggsave(paste0(out_dir,"/6.diff_pathway_xgr.pdf"),xgr_output$gp)
ggplot2::ggsave(paste0(out_dir,"/6.diff_pathway_xgr.png"),xgr_output$gp)
dir.create("result/pathview")
setwd("result/pathview")
tt <- diff_result %>%
dplyr::filter(name %in% kegg_id_need)
name <- tt %>%
dplyr::pull(name)
value <- tt %>%
dplyr::mutate(value=log2(fold_change)) %>%
dplyr::pull(value)
names(value) <- name
pPathview(cpd.data=value)
the time series analysis using mFuzz
TSMfuzz(mexpr,out_dir="result/mfuzz",range=c(4,12))
the time series using supraHex
TSSupraHex(mexpr,newdata=NULL,out_dir="result/supraHex/")
the time series analysis of clinical biomarker
time_series_ALT <- pCliTS(clinical_index,"ALT")
ggsave("result/clinical_time_series.pdf",time_series_ALT)
the correlation analysis bewteen metabolites
dat_cor <- cor(t(mexpr))
pdf("result/correlation_metabolism.pdf")
corrplot::corrplot(dat_cor,type = "upper",order="hclust",tl.cex=0.5)
dev.off()
the clinical biomarkers correlation analysis
clinical_cor <- cor(clinical)
pdf("result/correlation_clinical.pdf")
corrplot::corrplot(clinical_cor, order = "hclust",hclust.method = "ward.D2",type = "upper")
dev.off()
mexpr1 <- t(mexpr)
clinical_data <- as.data.frame(t(clinical))
metabolite_data <- as.data.frame(mexpr1)
p <- plot_correlation_clinical_metabolite_mantel(clinical_data,metabolite_data)
ggsave("result/correlation_metabolites_clinical.pdf",p)
cor_result <- data.frame()
dir.create("result/correlation/",recursive =TRUE)
for (i in 1:ncol(clinical)) {
for (j in 1:nrow(mexpr)) {
cor_temp <- cor.test(as.numeric(clinical[,i]),log2(as.numeric(mexpr[j,])))
cor_p <- cor_temp$p.value
cor_cor <- cor_temp$estimate
cor_result_temp <- data.frame(metabolites=rownames(mexpr)[j],clinical=colnames(clinical)[i],cor=cor_cor,p=cor_p)
cor_result <- rbind(cor_result,cor_result_temp)
}
}
write.table(cor_result,"result/correlation/correlation_metabolites_clinical.txt",quote=F,row.names=F,sep="\t")
cor_result_filter <- cor_result %>%
dplyr::filter(p<0.05)
for (i in 1:nrow(cor_result_filter)) {
mexpr_filter <- mexpr[which(rownames(mexpr)==cor_result_filter$metabolites[i]),]
clinical_filter <- clinical[,which(colnames(clinical)==cor_result_filter$clinical[i])]
dat <- as.data.frame(cbind(log2(t(mexpr_filter)),clinical_filter))
colnames(dat) <- c("metabolites","clinical")
p <- ggpubr::ggscatter(dat, x = "metabolites", y = "clinical",
add = "reg.line", conf.int = TRUE,
cor.coef = TRUE, cor.method = "pearson",
xlab =paste0("the log2 value of ",cor_result_filter$metabolites[i]), ylab = paste0("the value of ",cor_result_filter$clinical[i]))
clinical_name = gsub("[/]","",cor_result_filter$clinical[i])
ggsave(paste0("result/correlation/",clinical_name,"_",cor_result_filter$metabolites[i],".pdf"),p)
}
p <- plot_correlation_clinical_metabolite(clinical,mexpr,"CRP","1-Methyladenosine")
ggsave("result/correlation/CRP_1-Methyladenosine.pdf",p)
the survival analysis
p <- survCli(clinical_survival)
pdf("result/survival_OS.pdf")
p$p_OS
dev.off()
pdf("result/survival_RFS.pdf")
ggsave("result/survival_RFS.pdf",p$p_RFS)
dev.off()
pdf("result/survival_EFS.pdf")
ggsave("result/survival_EFS.pdf",p$p_EFS)
dev.off()
automatic classify the samples by each selected metabolites by mean or median, and then plot the survival
metabolites <- c("MT.ND4","MT.ND4L","MT.ND6")
survMet(dat,metabolites,cluster_method="mean",out_dir="survival/metabolites/")
the cox analysis about clinical
result <- MetCox(dat)
write.table(result,"result/clinical_cox.txt",quote=F,sep="\t",row.names = F)
name <- c("C15973","C16254","MDH1")
result <- pathway_gene_metabolite(name)
ggsave("result/pathway_gene_metabolite.png",result$gp)
result$output
load("result/gene_dat.rda")
load("result/metabo_dat.rda")
group <- rep("normal",length(names(mexpr)))
group[grep("TUMOR",names(mexpr))] <- "tumor"
dat <- rbind(gene_dat,log(mexpr))
result <- mlimma(dat,group)
pdf("result/case1.pdnet.pdf")
pdnet(mexpr,gene_dat,result)
dev.off()
png("result/case1.pdnet.png",width = 8, height = 7, units = 'in', res = 200)
pdnet(mexpr,gene_dat,result,nsize=50)
dev.off()
the case2 dataset for metabolism is from Xiao et al(2022) and the transcriptome dataset is from Jiang et al(2019), we choose the patient that have metabolism data and transcriptome data.
load("TNBC/meta_data.rda")
load("TNBC/rna_data.rda")
group <- rep("normal",length(names(meta_data)))
group[grep("_T",names(meta_data))] <- "tumor"
dat <- rbind(rna_data,meta_data)
result <- mlimma(dat,group)
pdf("TNBC/case2.pdnet.pdf")
pdnet(meta_data,rna_data,result,nsize=50)
dev.off()
png("TNBC/case2.pdnet.png",width = 8, height = 7, units = 'in', res = 200)
pdnet(meta_data,rna_data,result,nsize=50)
dev.off()
sessionInfo()
## R version 4.2.1 (2022-06-23)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Catalina 10.15.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] BiocStyle_2.24.0
##
## loaded via a namespace (and not attached):
## [1] bookdown_0.28 digest_0.6.29 R6_2.5.1
## [4] jsonlite_1.8.0 magrittr_2.0.3 evaluate_0.16
## [7] stringi_1.7.8 cachem_1.0.6 rlang_1.0.4
## [10] cli_3.3.0 rstudioapi_0.13 jquerylib_0.1.4
## [13] bslib_0.4.0 rmarkdown_2.14 tools_4.2.1
## [16] stringr_1.4.0 xfun_0.32 yaml_2.3.5
## [19] fastmap_1.1.0 compiler_4.2.1 BiocManager_1.30.18
## [22] htmltools_0.5.3 knitr_1.39 sass_0.4.2